In [3]:
import pandas as pd
import plotly.express as px
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import sqlite3 as sq3
import pandas as pd
sns.set()
In [4]:
con = sq3.connect("opioid.db")

annual = pd.read_sql_query("SELECT * from annual", con)

# you have to close the connection
con.close

annual.head
Out[4]:
<bound method NDFrame.head of              BUYER_COUNTY BUYER_STATE  year count DOSAGE_UNIT countyfips
0          1    ABBEVILLE          SC  2006   877      363620      45001
1          2    ABBEVILLE          SC  2007   908      402940      45001
2          3    ABBEVILLE          SC  2008   871      424590      45001
3          4    ABBEVILLE          SC  2009   930      467230      45001
4          5    ABBEVILLE          SC  2010  1197      539280      45001
...      ...          ...         ...   ...   ...         ...        ...
27753  27754           NA          NV  2007   447      200600         NA
27754  27755           NA          NV  2008     5        2200         NA
27755  27756           NA          OH  2006    23        5100         NA
27756  27757           NA          PR  2006    10       17800         NA
27757  27758           NA          PR  2007     2        1300         NA

[27758 rows x 7 columns]>
In [5]:
annual.loc[(annual.BUYER_STATE == 'AR') & (annual.BUYER_COUNTY == 'MONTGOMERY'), 'countyfips'] = 5097
annual
Out[5]:
BUYER_COUNTY BUYER_STATE year count DOSAGE_UNIT countyfips
0 1 ABBEVILLE SC 2006 877 363620 45001
1 2 ABBEVILLE SC 2007 908 402940 45001
2 3 ABBEVILLE SC 2008 871 424590 45001
3 4 ABBEVILLE SC 2009 930 467230 45001
4 5 ABBEVILLE SC 2010 1197 539280 45001
... ... ... ... ... ... ... ...
27753 27754 NA NV 2007 447 200600 NA
27754 27755 NA NV 2008 5 2200 NA
27755 27756 NA OH 2006 23 5100 NA
27756 27757 NA PR 2006 10 17800 NA
27757 27758 NA PR 2007 2 1300 NA

27758 rows × 7 columns

In [6]:
annual.loc[(annual.BUYER_COUNTY == 'NA')]
Out[6]:
BUYER_COUNTY BUYER_STATE year count DOSAGE_UNIT countyfips
27741 27742 NA AE 2006 2 330 NA
27742 27743 NA CA 2006 47 12600 NA
27743 27744 NA CT 2006 305 78700 NA
27744 27745 NA CT 2007 112 30900 NA
27745 27746 NA CT 2008 48 15000 NA
27746 27747 NA FL 2006 9 900 NA
27747 27748 NA FL 2007 7 700 NA
27748 27749 NA GA 2006 114 51700 NA
27749 27750 NA IA 2006 7 2300 NA
27750 27751 NA IN 2006 292 39300 NA
27751 27752 NA MA 2006 247 114900 NA
27752 27753 NA NV 2006 380 173600 NA
27753 27754 NA NV 2007 447 200600 NA
27754 27755 NA NV 2008 5 2200 NA
27755 27756 NA OH 2006 23 5100 NA
27756 27757 NA PR 2006 10 17800 NA
27757 27758 NA PR 2007 2 1300 NA
In [7]:
annual.drop(range(27741, 27758), axis=0)
Out[7]:
BUYER_COUNTY BUYER_STATE year count DOSAGE_UNIT countyfips
0 1 ABBEVILLE SC 2006 877 363620 45001
1 2 ABBEVILLE SC 2007 908 402940 45001
2 3 ABBEVILLE SC 2008 871 424590 45001
3 4 ABBEVILLE SC 2009 930 467230 45001
4 5 ABBEVILLE SC 2010 1197 539280 45001
... ... ... ... ... ... ... ...
27736 27737 ZAVALA TX 2010 248 200100 48507
27737 27738 ZAVALA TX 2011 406 244800 48507
27738 27739 ZAVALA TX 2012 473 263700 48507
27739 27740 ZAVALA TX 2013 399 186700 48507
27740 27741 ZAVALA TX 2014 162 148930 48507

27741 rows × 7 columns

In [8]:
annual = annual.assign(pills = pd.to_numeric((annual.DOSAGE_UNIT))/1000000)
annual_year = annual
annual_year = annual_year.groupby(['year']).pills.mean().reset_index().rename(columns = {'pills' : 'average_pills'})
In [9]:
fig = px.scatter(annual_year, x = "year", y='average_pills')
fig.show()
In [ ]: